from ultralytics import YOLO
import numpy as np
import cv2
from matplotlib import pyplot as plt
from jetcam.csi_camera import CSICamera
import time

model = YOLO("yolo26n.pt")  # load a pretrained model (recommended for training)

model.fuse()   # Fuses the model for faster inference

from jetbot import Camera

camera = Camera.instance(capture_device=0, width=640, height=640, capture_width=640, capture_height=640, capture_fps=30, flip_method = 0)

classes = open('coco.names').read().strip().split('\n')

image = camera.value
detections = model(image, conf=0.5)

try:
    
    for box in detections[0].boxes:
        print('predected class is:' + classes[int(box.cls[0].item())] + ' with confidence: ' +  str(box.conf.item()))

except:
    print("No object Detected")
        
#print(detections)

from matplotlib import pyplot as plt


width = camera.width
height = camera.height
for det in detections[0]:
    #bbox = det['bbox']
    print(classes[int(det.boxes.cls.item())])
    bbox = det.boxes.xyxyn[0]
    myImage = cv2.rectangle(image, (int(width * bbox[0].item()), int(height * bbox[1].item())), (int(width * bbox[2].item()), int(height * bbox[3].item())), (0, 255, 0), 5)
        
try:
    plt.imshow(myImage)

except:
    print("No object Detected")

from IPython.display import display
import ipywidgets.widgets as widgets

detections_widget = widgets.Textarea()

try:
    detections_widget.value = str(classes[int(detections[0].boxes[0].cls.item())])

except:
    print("No object Detected")

display(detections_widget)

import torch
import torchvision
import torch.nn.functional as F
import cv2
import numpy as np

collision_model = torchvision.models.alexnet(weights='DEFAULT')
collision_model.classifier[6] = torch.nn.Linear(collision_model.classifier[6].in_features, 2)
collision_model.load_state_dict(torch.load('../collision_avoidance/best_model.pth'))
device = torch.device('cuda')
collision_model = collision_model.to(device)

mean = 255.0 * np.array([0.485, 0.456, 0.406])
stdev = 255.0 * np.array([0.229, 0.224, 0.225])

normalize = torchvision.transforms.Normalize(mean, stdev)

def preprocess(camera_value):
    global device, normalize
    x = camera_value
    x = cv2.resize(x, (224, 224))
    x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
    x = x.transpose((2, 0, 1))
    x = torch.from_numpy(x).float()
    x = normalize(x)
    x = x.to(device)
    x = x[None, ...]
    return x

from jetbot import Robot

robot = Robot()

from jetbot import bgr8_to_jpeg

blocked_widget = widgets.FloatSlider(min=0.0, max=1.0, value=0.0, description='blocked')
image_widget = widgets.Image(format='jpeg', width=camera.width, height=camera.height)
label_widget = widgets.Text(value='person', description='tracked label')
speed_widget = widgets.FloatSlider(value=0.4, min=0.0, max=1.0, description='speed')
turn_gain_widget = widgets.FloatSlider(value=0.8, min=0.0, max=2.0, description='turn gain')

display(widgets.VBox([
    widgets.HBox([image_widget, blocked_widget]),
    label_widget,
    speed_widget,
    turn_gain_widget
]))

width = int(image_widget.width)
height = int(image_widget.height)

def detection_center(detection):
    """Computes the center x, y coordinates of the object"""
    bbox = detection.boxes.xyxyn[0]
    center_x = (bbox[0].item() + bbox[2].item()) / 2.0 - 0.5
    center_y = (bbox[1].item() + bbox[3].item()) / 2.0 - 0.5
    return (center_x, center_y)
    
def norm(vec):
    """Computes the length of the 2D vector"""
    return np.sqrt(vec[0]**2 + vec[1]**2)

def closest_detection(detections):
    """Finds the detection closest to the image center"""
    closest_detection = None
    for det in detections:
        center = detection_center(det)
        if closest_detection is None:
            closest_detection = det
        elif norm(detection_center(det)) < norm(detection_center(closest_detection)):
            closest_detection = det
    return closest_detection
        
def execute(change):
    image = change['new']
    
    # execute collision model to determine if blocked
    collision_output = collision_model(preprocess(image)).detach().cpu()
    prob_blocked = float(F.softmax(collision_output.flatten(), dim=0)[0])
    blocked_widget.value = prob_blocked
    
    # turn left if blocked
    if prob_blocked > 0.5:
        robot.left(0.6)
        image_widget.value = bgr8_to_jpeg(image)
        return
        
    # compute all detected objects
    detections = model(image, conf=0.5)
    
    # draw all detections on image
    try:
        for det in detections[0]:
            bbox = det.boxes.xyxyn[0]
            v2.rectangle(image, (int(width * bbox[0].item()), int(height * bbox[1].item())), (int(width * bbox[2].item()), int(height * bbox[3].item())), (0, 255, 0), 5)
    except:
        print("No object Detected")

    
    # select detections that match selected class label
    matching_detections = [d for d in detections[0] if classes[int(d.boxes.cls.item())] == label_widget.value]
    
    
    # get detection closest to center of field of view and draw it
    det = closest_detection(matching_detections)
    if det is not None:
        bbox = det.boxes.xyxyn[0]
        cv2.rectangle(image, (int(width * bbox[0].item()), int(height * bbox[1].item())), (int(width * bbox[2].item()), int(height * bbox[3].item())), (0, 255, 0), 5)
    
    
        
    # otherwise go forward if no target detected
    if det is None:
        robot.forward(float(speed_widget.value))
        
    # otherwise steer towards target
    else:
        # move robot forward and steer proportional target's x-distance from center
        center = detection_center(det)
        robot.set_motors(
            float(speed_widget.value + turn_gain_widget.value * center[0]),
            float(speed_widget.value - turn_gain_widget.value * center[0])
        )
    
    # update image widget
    image_widget.value = bgr8_to_jpeg(image)
    
execute({'new': camera.value})

camera.unobserve_all()
camera.observe(execute, names='value')

import time

camera.unobserve_all()
time.sleep(1.0)
robot.stop()

camera.stop()

Object Following - Live Demo¶

Compute detections on single camera image¶

Display detections in text area¶

Control robot to follow central object¶